import io

import requests
from lxml import etree
import re
import base64
from fontTools.ttLib import TTFont
# url = 'https://sz.58.com/chuzu/'
#
# headers = {
#     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
# }
#
# # 获取网页对象
# r = requests.get(url=url, headers=headers)
#
# # print(r.text)
# with open('58同城.html','w',encoding='utf-8') as f:
#     f.write(r.text)

with open('58同城.html', 'r', encoding='utf-8') as f:
    read_58 = f.read()
# print(read_58)

# 获取字体文件
ttf_str = re.search("base64,(.*?)'\)", read_58).group(1)
print(ttf_str)

# 将字体文件数据base64解码
ttf_base64 = base64.b64decode(ttf_str)

# 将字体文件转换成ttf文件 再转换为xml文件
# 将文件在缓存中打开

xml = TTFont(io.BytesIO(ttf_base64))

# ttf.saveXML('58同城.xml')
print(xml.get('cmap').getBestCmap())